file_tag = sprintf("%s_BL_%s", cell_type_name, graph_weight)
assayed_genes = scan(sprintf("output/gene_list_%s.txt", file_tag),
what = character(), sep="\n")
gene_sets = scan(sprintf("output/name_s_%s.txt", file_tag),
what = character(), sep="\n")
gene_sets = sapply(gene_sets, strsplit, USE.NAMES=FALSE, split=",")
n_genes = sapply(gene_sets, length)
names(n_genes) = NULL
summary(n_genes)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.0 23.0 24.0 23.3 25.0 26.0
## [1] 40
## [1] 4 19 20 22 22 22 22 22 23 23 23 23 23 23 24 24 24 24 24 24 24 24 24 24 25
## [26] 25 25 25 25 25 25 25 25 25 25 25 25 25 25 26
bioMart.All the gene symbols that can be found in bioMart are
consistent with what we have. So no need to run it.
ensembl = useMart("ensembl", dataset = "hsapiens_gene_ensembl")
gene_BM = getBM(attributes = c("hgnc_symbol", "external_gene_name"),
filters = "external_gene_name",
values = assayed_genes,
mart = ensembl)
length(assayed_genes)
dim(gene_BM)
gene_BM[1:2,]
table(assayed_genes %in% gene_BM$external_gene_name)
t1 = table(gene_BM$external_gene_name)
dup = names(t1)[t1 > 1]
gene_BM[gene_BM$external_gene_name %in% dup,]
table(gene_BM$hgnc_symbol == gene_BM$external_gene_name)
w2kp = which(gene_BM$hgnc_symbol != gene_BM$external_gene_name)
gene_BM[w2kp,]alias2Symbol function from
limma.a2s = rep(NA, length(assayed_genes))
for(i in 1:length(assayed_genes)){
gi = assayed_genes[i]
ai = alias2Symbol(gi)
if(length(ai) > 1){
print(gi)
print(ai)
}
a2s[i] = ai[1]
}## [1] "HIST1H2BC"
## [1] "H2BC5" "H2BC4"
## [1] "MPP6"
## [1] "MPHOSPH6" "PALS2"
## [1] "MARS"
## [1] "MARS1" "SLA2"
## [1] "SEPT2"
## [1] "SEPTIN6" "SEPTIN2"
##
## FALSE TRUE
## 1951 49
##
## FALSE TRUE <NA>
## 45 1906 49
gene_info = data.table(sym_in_data = assayed_genes, sym_limma = a2s)
gene_info[sym_in_data != sym_limma,]## sym_in_data sym_limma
## 1: ADPRHL2 ADPRS
## 2: AES TLE5
## 3: C12orf45 NOPCHAP1
## 4: C3orf58 DIPK2A
## 5: C6orf99 LINC02901
## 6: CBWD2 ZNG1B
## 7: CXorf57 RADX
## 8: FAM102A EEIG1
## 9: FAM122C PABIR3
## 10: FAM153C FAM153CP
## 11: FAM160A2 FHIP1B
## 12: GRASP TAMALIN
## 13: H2AFX H2AX
## 14: HIST1H2AG H2AC11
## 15: HIST1H2BC H2BC5
## 16: HIST1H2BK H2BC12
## 17: HIST1H2BN H2BC15
## 18: HIST1H3A H3C1
## 19: HIST1H3H H3C10
## 20: HIST1H4C H4C3
## 21: HIST2H2BF H2BC18
## 22: LRMP IRAG2
## 23: MFSD14C MFSD14CP
## 24: MKL1 MRTFA
## 25: MPP6 MPHOSPH6
## 26: RNASEH1-AS1 RNASEH1-DT
## 27: SEPT6 SEPTIN6
## 28: SEPT9 SEPTIN9
## 29: TMEM161B-AS1 TMEM161B-DT
## 30: ARNTL BMAL1
## 31: C6orf106 ILRUN
## 32: C6orf203 MTRES1
## 33: FAM129A NIBAN1
## 34: FAM160B1 FHIP2A
## 35: FAM192A PSME3IP1
## 36: HEXDC HEXD
## 37: HIST1H1E H1-4
## 38: KIAA0100 BLTP2
## 39: KIAA1551 RESF1
## 40: LARS LARS1
## 41: MARS MARS1
## 42: PLA2G16 PLAAT3
## 43: SEPT2 SEPTIN6
## 44: SMIM37 MTLN
## 45: YARS YARS1
## sym_in_data sym_limma
gene_info[, gene_symbol := sym_in_data]
gene_info[which(sym_in_data != sym_limma), gene_symbol := sym_limma]
dim(gene_info)## [1] 2000 3
## sym_in_data sym_limma gene_symbol
## 1: ABCD3 ABCD3 ABCD3
## 2: ABCG1 ABCG1 ABCG1
## 3: ABHD5 ABHD5 ABHD5
## 4: ABI1 ABI1 ABI1
## 5: ABLIM1 ABLIM1 ABLIM1
## t1
## 1 2
## 1998 1
## sym_in_data sym_limma gene_symbol
## 1: SEPT6 SEPTIN6 SEPTIN6
## 2: SEPT2 SEPTIN6 SEPTIN6
Gene set annotations (by gene symbols) were downloaded from MSigDB website.
gmtfile = list()
gmtfile[["reactome"]] = "../Annotation/c2.cp.reactome.v2023.2.Hs.symbols.gmt"
gmtfile[["go_bp"]] = "../Annotation/c5.go.bp.v2023.2.Hs.symbols.gmt"
gmtfile[["immune"]] = "../Annotation/c7.all.v2023.2.Hs.symbols.gmt"
pathways = list()
for(k1 in names(gmtfile)){
pathways[[k1]] = gmtPathways(gmtfile[[k1]])
}
names(pathways)## [1] "reactome" "go_bp" "immune"
## reactome go_bp immune
## 1692 7647 5219
Filter gene sets for size between 10 and 500.
## $reactome
## 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
## 5.0 7.0 9.0 12.0 17.0 23.0 31.0 44.0 71.8 120.9 1463.0
##
## $go_bp
## 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
## 5.0 6.0 8.0 10.0 14.0 19.0 29.0 46.0 80.8 183.0 1966.0
##
## $immune
## 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
## 5 162 193 197 199 199 200 200 200 200 1992
## [1] 2000 3
max_n2kp = 10
goseq_res = NULL
for(k in 1:length(gene_sets)){
if(length(gene_sets[[k]]) < 10) { next }
print(k)
set_k = paste0("set_", k)
print(gene_sets[[k]])
genes = gene_info$sym_in_data %in% gene_sets[[k]]
names(genes) = gene_info$gene_symbol
table(genes)
pwf = nullp(genes, "hg38", "geneSymbol")
for(k1 in names(pathways)){
p1 = pathways[[k1]]
res1 = goseq(pwf, "hg38", "geneSymbol",
gene2cat=goseq:::reversemapping(p1))
res1$FDR = p.adjust(res1$over_represented_pvalue, method="BH")
nD = sum(res1$FDR < 0.1)
if(nD > 0){
res1 = res1[order(res1$FDR),][1:min(nD, max_n2kp),]
res1$category = gsub("REACTOME_|GOBP_", "", res1$category)
res1$category = gsub("_", " ", res1$category)
res1$category = tolower(res1$category)
res1$category = substr(res1$category, start=1, stop=81)
goseq_res[[set_k]][[k1]] = res1
}
}
}## [1] 1
## [1] "ADK" "KIF9" "SNHG7" "UPF3A" "AP3M2" "APBB1IP"
## [7] "ATXN7L3B" "BROX" "C2orf68" "CYTIP" "ENY2" "IFITM2"
## [13] "IRAK4" "PKNOX1" "PLAC8" "PRMT2" "SDR39U1" "SEC14L1"
## [19] "SESN3" "SETD5" "SIGIRR" "STK17B"
## [1] 2
## [1] "CCM2" "CHURC1" "DNAAF2" "EVI2B" "FCER1G" "LEPROTL1"
## [7] "NMT2" "PTGER4" "RNF220" "SCPEP1" "SERTAD1" "SFI1"
## [13] "SNX18" "TMEM107" "TNFRSF25" "APOBEC3G" "EMP3" "FAM53B"
## [19] "KLRG1" "LAG3" "SCAMP4" "SPOCK2" "UBR2"
## [1] 3
## [1] "AC008105.3" "AC093323.1" "AK5" "AL359220.1" "ARRDC2"
## [6] "BOLA2-SMG1P6" "COQ8A" "GPCPD1" "IGLV1-44" "JCHAIN"
## [11] "LINC01215" "LRMP" "LST1" "MDS2" "MFSD14C"
## [16] "MUC20-OT1" "NAA16" "PLCL1" "SPART" "TMEM161B-AS1"
## [21] "TRAV38-2DV8" "TRBV7-3" "ZNF600" "ZNF749" "ZNF862"
## [1] 5
## [1] "AC007952.4" "AC013264.1" "AC025171.2" "AC087239.1" "AC245014.3"
## [6] "AL138963.3" "ANXA2R" "DELE1" "IGKV3-20" "JAML"
## [11] "LRRC8C-DT" "LRRN3" "NR4A2" "SCML4" "SNHG8"
## [16] "ST7L" "TBCC" "TMEM71" "TRAV8-6" "TRBV28"
## [21] "TRBV5-4" "WASHC4" "ZFP14" "ZNF506" "MATK"
## [1] 6
## [1] "AC145124.1" "ARHGAP15" "C12orf29" "COX10" "CST3"
## [6] "DPYD" "ERAP2" "IGLV2-14" "KCNK6" "TOMM7"
## [11] "TRAV12-2" "TRAV13-2" "TRAV21" "TRAV41" "TRBC1"
## [16] "TRBC2" "TRBV6-2" "TRBV6-6" "TSPOAP1-AS1" "TTC39C"
## [21] "UCP2" "CCL5" "CCR4" "FRMD4B" "GZMA"
## [1] 7
## [1] "AC009061.2" "AMD1" "CD96" "COA1" "HELQ"
## [6] "LDLRAP1" "RNASEK" "ARID5B" "CARD11" "CCDC112"
## [11] "CHD6" "COX17" "CRTC3" "ERICH1" "GON4L"
## [16] "KIAA1551" "KIF3B" "MIER1" "NADSYN1" "NBEAL2"
## [21] "SLC23A2" "STK10" "TCF25" "TRAC" "ZNF267"
## [1] 8
## [1] "ANAPC4" "BTG1" "BTG2" "CHD2" "EIF1" "EPHB6"
## [7] "FBXL3" "LYRM7" "PHC1" "PLK2" "PNRC1" "PPIL3"
## [13] "SFMBT1" "ZFP36L1" "CCDC12" "CGGBP1" "DPP7" "GPHN"
## [19] "PCGF5" "PUS7L" "SMAP1" "SREK1IP1"
## [1] 9
## [1] "ABHD5" "ATG13" "CD38" "FCGR3A" "LAX1" "MMP24OS"
## [7] "ODF2L" "PNPLA2" "RASA2" "RCSD1" "SNRK" "TESPA1"
## [13] "TMC8" "TRAV23DV6" "TRAV8-2" "TUBD1" "TUBE1" "ZBTB25"
## [19] "HSH2D" "IFI44" "IFI44L" "OAS1" "TRAT1"
## [1] 10
## [1] "ABCC10" "ABHD3" "AC116407.2" "AC118549.1" "ADTRP"
## [6] "C20orf204" "CARMIL2" "CFD" "FAAH2" "GPR132"
## [11] "HRH2" "LRRC58" "MIAT" "NORAD" "NRDC"
## [16] "OSM" "PCSK1N" "SLF2" "SLFN12L" "SPATA13"
## [21] "TOMM70" "TRANK1" "UBALD2" "Z93930.2" "ZFYVE28"
## [1] 11
## [1] "BDH1" "CCR6" "CD48" "LCLAT1" "PTPRCAP" "RNF139"
## [7] "SLC12A6" "SLC25A25" "TRAM1" "ATP8B2" "CD2" "CD58"
## [13] "CMTM6" "GIMAP4" "HM13" "LPCAT3" "LPCAT4" "LPGAT1"
## [19] "SLC12A9"
## [1] 12
## [1] "AC119396.1" "AF213884.3" "AL118516.1" "AL627171.1" "AL645728.1"
## [6] "C12orf57" "C6orf99" "CHRM3-AS2" "CSKMT" "EGR1"
## [11] "HIPK1-AS1" "HIST1H2BC" "ILF3-DT" "LINC00649" "LINC01550"
## [16] "METTL21A" "MID1IP1" "NPIPB11" "NUP58" "ORC4"
## [21] "TC2N" "TMEM154" "TRABD2A" "TMEM175"
## [1] 13
## [1] "AKT3" "C12orf45" "C1orf43" "EPHX2" "GTPBP6" "IP6K2"
## [7] "LBH" "NR1D1" "NR1D2" "PAPOLG" "PDCD7" "PDE3B"
## [13] "PHLDA1" "PPP1R15B" "TMEM63A" "TNFAIP8" "TTC3" "DHX29"
## [19] "EAPP" "EHMT1" "POGLUT1" "SPTLC2" "TCIRG1"
## [1] 14
## [1] "ACADSB" "ACSS1" "ANKH" "ARID4A" "ARMH1" "C7orf31"
## [7] "CAMK4" "CFAP36" "CTSF" "DAPP1" "FAM153C" "FCMR"
## [13] "FOSB" "FYB1" "HIST1H4C" "IER2" "IFRD1" "ING2"
## [19] "MTRNR2L12" "MXD1" "PLEKHM1" "RGCC" "TGIF1" "YY1AP1"
## [1] 15
## [1] "CHIC2" "DHRS3" "ADGRE5" "AKNA" "C12orf75"
## [6] "CARD16" "CD69" "GRK2" "KANSL1-AS1" "KIAA0040"
## [11] "MT1X" "MTRNR2L8" "NAA38" "NBDY" "NDUFC1"
## [16] "NSD3" "PAXX" "PTGER2" "PUM3" "RASAL3"
## [21] "TRAV29DV5" "TRBV12-3" "TSPAN32" "TUT4" "USP30-AS1"
## [1] 16
## [1] "AC015982.1" "AC027644.3" "AC084033.3" "AC097376.2" "AL135791.1"
## [6] "ARF4-AS1" "ARL4A" "CERNA1" "CHMP7" "FAM102A"
## [11] "FAM122C" "GOLGA8B" "IL23A" "LETM2" "LINC02265"
## [16] "LINC02273" "NDUFV2-AS1" "NOCT" "NPIPB5" "NSMCE3"
## [21] "PRR7" "RGS1" "THAP9-AS1" "ATAD2B" "NUP160"
## [1] 17
## [1] "AKIRIN1" "AKIRIN2" "GIMAP8" "OSER1" "PCMTD2" "PYROXD1" "RABL2B"
## [8] "STMN3" "TECPR1" "ZFX" "ZNF140" "CAMTA2" "DOCK11" "NDUFA3"
## [15] "NRROS" "OAS2" "PPP2R3C" "PSMB9" "RNF19A" "SRP54" "TBC1D14"
## [22] "TRAPPC8" "UBA7" "UTP25"
## [1] 18
## [1] "CD28" "DBP" "PECAM1" "BATF" "CD320" "CISH" "CMIP"
## [8] "FBXO9" "G2E3" "IFI35" "IL2RB" "IL2RG" "IL32" "INPP5D"
## [15] "MYOM2" "NQO2" "P4HTM" "RAPGEF1" "SLC35A2" "TIPARP" "UGCG"
## [22] "ZNF276" "ZNF708"
## [1] 19
## [1] "AC025159.1" "AC087623.3" "COQ7" "EFCAB2" "ENOSF1"
## [6] "FOXN3" "GADD45B" "GRASP" "KCNQ1OT1" "MBD6"
## [11] "NSUN6" "ODC1" "SLC22A17" "SNHG12" "TBCCD1"
## [16] "TCTA" "YPEL2" "AC022916.1" "ANKAR" "AREG"
## [21] "ITK" "SNHG9" "TAF4B" "TNFRSF4" "UBE3B"
## [1] 20
## [1] "C16orf74" "C3orf58" "CCDC141" "CCDC66" "CEP95" "CHMP1B"
## [7] "FAM117B" "IL6R" "MAST4" "PHYH" "PIK3IP1" "PNISR"
## [13] "RCAN3" "RIC1" "SH3YL1" "SLC44A1" "STX16" "TBCK"
## [19] "TSC22D2" "WARS2" "ADAM19" "C12orf4" "HELB" "WDR7"
## [1] 21
## [1] "ADPRHL2" "BTN3A1" "LTA4H" "PITPNA-AS1" "TMEM245"
## [6] "UTP6" "ALOX5AP" "APOL1" "APOL6" "BTN3A2"
## [11] "BUD23" "CTSW" "ECPAS" "GBP1" "GBP3"
## [16] "GBP5" "GZMM" "HEXDC" "MCTP2" "NCBP3"
## [21] "PEX11B" "SP140" "TAOK3" "TGFBR3" "TMEM62"
## [1] 22
## [1] "RBKS" "ZSCAN18" "ABHD2" "APH1B" "CITED4" "DDX3Y"
## [7] "EIF1AY" "GABPB2" "IL21R" "KDM5D" "KHNYN" "KIF21B"
## [13] "RNF157" "RPS4Y1" "RTKN2" "SBNO2" "SLC25A37" "SUSD6"
## [19] "TENT5C" "TMEM156" "TTTY15" "UTY"
## [1] 23
## [1] "ASAH1" "HVCN1" "MYADM" "OXLD1" "AGAP2" "ARHGAP4" "BLOC1S6"
## [8] "CD55" "CD7" "DIP2A" "GGCX" "IFNAR1" "IFNGR2" "MORC3"
## [15] "NME3" "NPDC1" "PIEZO1" "PSME2" "REXO2" "SCAF8" "TIMP1"
## [22] "UQCC2" "ZDHHC20"
## [1] 24
## [1] "COL18A1" "DALRD3" "NECAP2" "ADGRG1" "ARHGEF3" "ARL4C"
## [7] "COL6A3" "COLGALT1" "CX3CR1" "FGFBP2" "LAIR2" "LINC01871"
## [13] "LPIN1" "LPIN2" "LY96" "PCED1B" "PLEK" "RAP1GAP2"
## [19] "RNPEPL1" "SP140L" "TCAF2" "TTC16" "VPS13D" "ZNF683"
## [1] 25
## [1] "C6orf62" "CCNH" "COQ10B" "DGKA" "ERCC5" "FHIT" "INPP4B"
## [8] "KLHL24" "LEF1" "MYNN" "NFYB" "TCF7" "TPP2" "ACAP1"
## [15] "CREBZF" "ETNK1" "NARF" "NFE2L1" "PPRC1" "RHOH" "STAT4"
## [22] "TGS1" "VPS36" "XBP1"
## [1] 26
## [1] "AP002360.1" "ATG9B" "CITED2" "FBXO3" "KIAA1328"
## [6] "MAML2" "MBNL2" "MHENCR" "MLXIP" "MMP28"
## [11] "MTERF4" "PDE7A" "PLCD1" "RAB33B" "RETREG1"
## [16] "SLC8B1" "TAGAP" "THAP6" "TRAV8-3" "TSPYL4"
## [21] "WHAMM" "WSB1" "XIST" "ZNF10" "CCDC43"
## [1] 27
## [1] "CBR3" "DPEP2" "GGT7" "LIMS1" "NEU1" "SERTAD2"
## [7] "SLC25A32" "USP3" "ZNF677" "B3GALT4" "FKBP11" "FUT11"
## [13] "GALNS" "GZMB" "PARVG" "PLAA" "RSU1" "SACS"
## [19] "SH3BGRL3" "SRGN" "ST6GAL1" "TRIB2"
## [1] 28
## [1] "MZF1" "NSMAF" "YPEL5" "C4orf48" "CROT" "DENND4B" "EFR3A"
## [8] "EIF2AK4" "GNLY" "ISG20" "KCNAB2" "KLF9" "KLHDC4" "LTBP4"
## [15] "NEK9" "NLRC5" "NT5C" "PDE4B" "PREP" "PREX1" "PTPRE"
## [22] "RCBTB2" "SYTL1" "ZNF236" "ZNF292"
## [1] 29
## [1] "ABCG1" "AIF1" "ARHGAP9" "CD40LG" "CRLF3"
## [6] "DNASE1" "GIMAP1" "HIST1H3H" "IPCEF1" "JPX"
## [11] "KLF7" "LIPT1" "LYRM9" "NABP1" "NBPF14"
## [16] "NUAK2" "NUDT4" "RNASEH1-AS1" "SLC7A6" "SPIDR"
## [21] "STARD10" "STK17A" "ZFAS1" "ZMAT1" "ZNF84"
## [1] 30
## [1] "ATAD1" "CDC37L1" "COG5" "DIP2B" "DSE" "FCN1"
## [7] "HIVEP2" "HOXB2" "IGKV1-5" "IL16" "KLHL6" "LRRC8D"
## [13] "PARP16" "PITPNC1" "RAB37" "RPS26" "SIMC1" "SLC25A33"
## [19] "SLC26A11" "FAM129A" "GNPTAB" "JAKMIP1" "MT2A" "SERTAD3"
## [25] "SMPD2"
## [1] 31
## [1] "LINC00623" "POLD4" "TRAV12-1" "TRAV14DV4" "WAPL"
## [6] "ZNF91" "AC020915.3" "C1orf162" "CCL4" "CD300A"
## [11] "FCRL6" "GPR65" "HACD3" "MESD" "MFSD14A"
## [16] "NNT-AS1" "PARP11" "S100A12" "S1PR5" "SMIM37"
## [21] "THUMPD3-AS1" "TM2D1" "TMEM138" "TRBV6-1" "TSPAN14"
## [26] "TTC38"
## [1] 32
## [1] "AC004687.1" "AC004854.2" "AC016405.3" "AC020911.2" "AC023157.3"
## [6] "AC025164.1" "AC025171.3" "AC083880.1" "AC091271.1" "AC103591.3"
## [11] "AL121944.1" "AL139246.5" "AL357060.1" "AL451085.1" "ATP2B1-AS1"
## [16] "BX284668.6" "ID3" "LINC01465" "MATR3-1" "MZF1-AS1"
## [21] "NPIPB4" "OSER1-DT" "PARP8" "SDR42E2" "Z93241.1"
## [1] 33
## [1] "IER3" "PECR" "PPP2R5C" "ARHGAP10" "C1orf21" "CARD8-AS1"
## [7] "CST7" "DTX3L" "GMFB" "IGLV3-25" "IRF9" "MYO1F"
## [13] "NKG7" "ODF3B" "PARP14" "PARP9" "PEX26" "PLA2G16"
## [19] "PPP2R5B" "PTGDR" "RSAD2" "SLA" "TNFRSF18" "ZBP1"
## [1] 34
## [1] "ABLIM1" "ABTB1" "ATP5MG" "FBXO8" "HDHD2" "LIMD2"
## [7] "LINC00861" "LTB" "MCUB" "METAP1" "NOP53" "NT5DC1"
## [13] "PCNP" "RACK1" "SGSM3" "SMDT1" "TBC1D10C" "TBPL1"
## [19] "TRAV9-2" "CDK2AP2" "FGR" "NUS1" "PARP4" "S100A11"
## [1] 35
## [1] "CSRNP1" "GATA3" "KANSL2" "KDM6A" "NR4A3" "STK19" "TOB2"
## [8] "ZNF490" "ASCL2" "BHLHE40" "CNOT4" "GCN1" "KANSL1" "KMT2B"
## [15] "KMT2C" "PHF20" "PHF20L1" "RECK" "SH3BP5" "SYNE1" "TBX21"
## [22] "TNFSF10"
## [1] 36
## [1] "AC245297.3" "ADA2" "AL133415.1" "C1GALT1" "ERVK3-1"
## [6] "GIMAP6" "GZMK" "HEATR5B" "KLF10" "LEPROT"
## [11] "OTULINL" "SLC25A38" "TOX" "TRAV25" "TRBV6-5"
## [16] "BISPR" "CRIP2" "GAB3" "MIR4435-2HG" "MX2"
## [21] "MYBL1" "MYO1G" "SAMD9L" "SPON2" "SYNRG"
## [1] 37
## [1] "CDC42SE2" "FAM118A" "IER5" "KLF12" "MAP3K8" "MTO1"
## [7] "PRKCH" "RSRP1" "SENP7" "SLC12A7" "TBC1D7" "TCP11L2"
## [13] "TMEM204" "TTC31" "UBL3" "XRRA1" "ZFAND5" "ZNF831"
## [19] "CAST" "SAMD9" "SDF2" "TRIM22" "XAF1" "ZBTB8OS"
## [1] 38
## [1] "CD82" "FAM227B" "HIPK1" "IL27RA" "NEK1" "PLK3" "ST3GAL1"
## [8] "BCL9L" "DIAPH2" "FNDC3B" "GCA" "GIMAP7" "ITGAL" "LCP1"
## [15] "LY6E" "PHF11" "PHF23" "RBSN" "ROMO1" "TYMP"
## [1] 39
## [1] "COQ10A" "SLC2A3" "ZC3H12A" "ZC3H12D" "ZHX2"
## [6] "AC016831.7" "CCDC88B" "CEMIP2" "CRYBG1" "CTBS"
## [11] "DDIT4" "FRY" "GPRIN3" "ICOS" "KLF6"
## [16] "LTBP3" "NEAT1" "NFKBIZ" "PRDM2" "RNF145"
## [21] "RNMT" "SLC9A8" "TOB1" "ZNF101"
## [1] 40
## [1] "CLK4" "EPB41" "EVL" "FLI1" "LETMD1" "MALAT1" "NOSIP"
## [8] "POLL" "RGS10" "SEPT6" "SH2D1A" "SS18" "STARD7" "ZNF821"
## [15] "ACAP2" "ACAP3" "AFF1" "KMT2E" "LRRFIP1" "MLLT10" "PIM2"
## [22] "PVT1" "SELL"
for(n1 in names(goseq_res)){
k = as.numeric(gsub("set_", "", n1))
print(n1)
print(gene_sets[[k]])
print(goseq_res[[n1]])
}## [1] "set_9"
## [1] "ABHD5" "ATG13" "CD38" "FCGR3A" "LAX1" "MMP24OS"
## [7] "ODF2L" "PNPLA2" "RASA2" "RCSD1" "SNRK" "TESPA1"
## [13] "TMC8" "TRAV23DV6" "TRAV8-2" "TUBD1" "TUBE1" "ZBTB25"
## [19] "HSH2D" "IFI44" "IFI44L" "OAS1" "TRAT1"
## $immune
## category
## 5062 querec pbmc yf 17d vaccine age 18 45yo 3dy up
## 5063 querec pbmc yf 17d vaccine age 18 45yo 7dy up
## 2462 gse27241 ctrl vs digoxin treated rorgt ko cd4 tcell in th17 polarizing conditions
## 4972 howard t cell inact monov influenza a indonesia 05 2005 h5n1 age 18 49yo 1dy up
## 5102 zak pbmc mrkad5 hiv 1 gag pol nef age 20 50yo 3dy up
## 1289 gse17974 il4 and anti il12 vs untreated 24h act cd4 tcell dn
## over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 5062 7.573764e-06 0.9999998 5 27
## 5063 7.573764e-06 0.9999998 5 27
## 2462 8.505566e-05 0.9999971 4 22
## 4972 9.660053e-05 0.9999965 4 23
## 5102 1.058165e-04 0.9999937 5 45
## 1289 1.113896e-04 0.9999933 5 45
## FDR
## 5062 0.01933582
## 5063 0.01933582
## 2462 0.09479256
## 4972 0.09479256
## 5102 0.09479256
## 1289 0.09479256
##
## [1] "set_11"
## [1] "BDH1" "CCR6" "CD48" "LCLAT1" "PTPRCAP" "RNF139"
## [7] "SLC12A6" "SLC25A25" "TRAM1" "ATP8B2" "CD2" "CD58"
## [13] "CMTM6" "GIMAP4" "HM13" "LPCAT3" "LPCAT4" "LPGAT1"
## [19] "SLC12A9"
## $reactome
## category over_represented_pvalue
## 398 glycerophospholipid biosynthesis 2.354282e-05
## 721 phospholipid metabolism 1.799579e-04
## 31 acyl chain remodelling of pe 4.096445e-04
## 34 acyl chain remodelling of ps 4.096445e-04
## 32 acyl chain remodelling of pg 4.097818e-04
## under_represented_pvalue numDEInCat numInCat FDR
## 398 0.9999995 4 16 0.02827492
## 721 0.9999928 4 26 0.09842958
## 31 0.9999986 2 3 0.09842958
## 34 0.9999986 2 3 0.09842958
## 32 0.9999986 2 3 0.09842958
##
## $go_bp
## category over_represented_pvalue
## 2409 phosphatidylethanolamine acyl chain remodeling 3.028192e-06
## 2412 phosphatidylglycerol metabolic process 7.512040e-06
## under_represented_pvalue numDEInCat numInCat FDR
## 2409 1 3 4 0.01448384
## 2412 1 3 5 0.01796504
##
## $immune
## category
## 1106 gse17721 lps vs pam3csk4 1h bmdc up
## 3991 gse41867 memory vs exhausted cd8 tcell day30 lcmv dn
## over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 1106 3.056575e-05 0.9999992 4 20
## 3991 3.718477e-05 0.9999983 5 42
## FDR
## 1106 0.09493271
## 3991 0.09493271
##
## [1] "set_18"
## [1] "CD28" "DBP" "PECAM1" "BATF" "CD320" "CISH" "CMIP"
## [8] "FBXO9" "G2E3" "IFI35" "IL2RB" "IL2RG" "IL32" "INPP5D"
## [15] "MYOM2" "NQO2" "P4HTM" "RAPGEF1" "SLC35A2" "TIPARP" "UGCG"
## [22] "ZNF276" "ZNF708"
## $reactome
## category over_represented_pvalue
## 491 interleukin 3 interleukin 5 and gm csf signaling 8.564168e-06
## 498 interleukin receptor shc signaling 5.799668e-05
## 995 signaling by interleukins 6.607657e-05
## 486 interleukin 2 family signaling 2.608409e-04
## under_represented_pvalue numDEInCat numInCat FDR
## 491 0.9999999 4 12 0.01028557
## 498 0.9999994 3 7 0.02645265
## 995 0.9999946 7 92 0.02645265
## 486 0.9999946 3 11 0.07831747
##
## [1] "set_21"
## [1] "ADPRHL2" "BTN3A1" "LTA4H" "PITPNA-AS1" "TMEM245"
## [6] "UTP6" "ALOX5AP" "APOL1" "APOL6" "BTN3A2"
## [11] "BUD23" "CTSW" "ECPAS" "GBP1" "GBP3"
## [16] "GBP5" "GZMM" "HEXDC" "MCTP2" "NCBP3"
## [21] "PEX11B" "SP140" "TAOK3" "TGFBR3" "TMEM62"
## $go_bp
## category
## 693 disruption of anatomical structure in another organism
## over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 693 6.700668e-07 1 5 17
## FDR
## 693 0.003204929
##
## $immune
## category
## 892 gse1740 mcsf vs mcsf and ifng day2 derived macrophage up
## 2497 gse2770 il12 and tgfb vs il4 treated act cd4 tcell 48h dn
## 895 gse1740 unstim vs ifna stimulated mcsf derived macrophage dn
## 4959 howard b cell inact monov influenza a indonesia 05 2005 h5n1 age 18 49yo 1dy up
## 5035 nakaya pbmc imuvac male age 14 27yo 1d postboost vs 0dy preimm tiv up
## 509 gse1432 1h vs 24h ifng microglia dn
## 4963 howard monocyte inact monov influenza a indonesia 05 2005 h5n1 age 18 49yo 1dy up
## 152 gse1112 hy cd8ab vs hy cd8aa thymocyte rtoc culture up
## 4966 howard neutrophil inact monov influenza a indonesia 05 2005 h5n1 age 18 49yo 3dy
## over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 892 1.905730e-06 0.9999999 6 32
## 2497 7.532723e-06 0.9999996 6 40
## 895 1.530649e-05 0.9999991 6 45
## 4959 6.683699e-05 0.9999992 3 7
## 5035 6.685649e-05 0.9999992 3 7
## 509 7.497802e-05 0.9999942 6 59
## 4963 1.194293e-04 0.9999899 6 64
## 152 1.368987e-04 0.9999913 5 41
## 4966 1.540312e-04 0.9999899 5 42
## FDR
## 892 0.009730658
## 2497 0.019231041
## 895 0.026051649
## 4959 0.063806293
## 5035 0.063806293
## 509 0.063806293
## 4963 0.087115163
## 152 0.087375595
## 4966 0.087387036
##
## [1] "set_24"
## [1] "COL18A1" "DALRD3" "NECAP2" "ADGRG1" "ARHGEF3" "ARL4C"
## [7] "COL6A3" "COLGALT1" "CX3CR1" "FGFBP2" "LAIR2" "LINC01871"
## [13] "LPIN1" "LPIN2" "LY96" "PCED1B" "PLEK" "RAP1GAP2"
## [19] "RNPEPL1" "SP140L" "TCAF2" "TTC16" "VPS13D" "ZNF683"
## $reactome
## category over_represented_pvalue
## 167 collagen biosynthesis and modifying enzymes 2.263001e-05
## 170 collagen formation 8.391148e-05
## 168 collagen chain trimerization 1.166352e-04
## 1170 triglyceride biosynthesis 1.197099e-04
## 1073 synthesis of pe 3.589348e-04
## under_represented_pvalue numDEInCat numInCat FDR
## 167 0.9999998 3 6 0.02717864
## 170 0.9999989 3 9 0.03594291
## 168 1.0000000 2 2 0.03594291
## 1170 1.0000000 2 2 0.03594291
## 1073 0.9999988 2 3 0.08621613
##
## [1] "set_25"
## [1] "C6orf62" "CCNH" "COQ10B" "DGKA" "ERCC5" "FHIT" "INPP4B"
## [8] "KLHL24" "LEF1" "MYNN" "NFYB" "TCF7" "TPP2" "ACAP1"
## [15] "CREBZF" "ETNK1" "NARF" "NFE2L1" "PPRC1" "RHOH" "STAT4"
## [22] "TGS1" "VPS36" "XBP1"
## $immune
## category over_represented_pvalue
## 2996 gse32901 th1 vs th17 enriched cd4 tcell up 1.917639e-05
## under_represented_pvalue numDEInCat numInCat FDR
## 2996 0.9999989 6 49 0.09791465
##
## [1] "set_27"
## [1] "CBR3" "DPEP2" "GGT7" "LIMS1" "NEU1" "SERTAD2"
## [7] "SLC25A32" "USP3" "ZNF677" "B3GALT4" "FKBP11" "FUT11"
## [13] "GALNS" "GZMB" "PARVG" "PLAA" "RSU1" "SACS"
## [19] "SH3BGRL3" "SRGN" "ST6GAL1" "TRIB2"
## $reactome
## category over_represented_pvalue
## 105 blood group systems biosynthesis 0.0001173633
## 42 aflatoxin activation and detoxification 0.0001262795
## 101 biological oxidations 0.0001807014
## under_represented_pvalue numDEInCat numInCat FDR
## 105 1.0000000 2 2 0.07234081
## 42 1.0000000 2 2 0.07234081
## 101 0.9999968 3 11 0.07234081
##
## [1] "set_33"
## [1] "IER3" "PECR" "PPP2R5C" "ARHGAP10" "C1orf21" "CARD8-AS1"
## [7] "CST7" "DTX3L" "GMFB" "IGLV3-25" "IRF9" "MYO1F"
## [13] "NKG7" "ODF3B" "PARP14" "PARP9" "PEX26" "PLA2G16"
## [19] "PPP2R5B" "PTGDR" "RSAD2" "SLA" "TNFRSF18" "ZBP1"
## $immune
## category
## 1456 gse19888 adenosine a3r inh vs act with inhibitor pretreatment in mast cell up
## 1460 gse19888 adenosine a3r inh vs tcell membranes act mast cell up
## 3886 gse40666 untreated vs ifna stim stat1 ko cd8 tcell 90min up
## 4432 gse5589 lps and il10 vs lps and il6 stim il6 ko macrophage 45min up
## 3962 gse41867 day15 effector vs day30 exhausted cd8 tcell lcmv clone13 up
## 379 gse13485 ctrl vs day7 yf17d vaccine pbmc dn
## 4965 howard neutrophil inact monov influenza a indonesia 05 2005 h5n1 age 18 49yo 1dy
## 2412 gse26890 cxcr1 neg vs pos effector cd8 tcell up
## 2348 gse26030 th1 vs th17 day5 post polarization up
## 3275 gse36078 wt vs il1r ko lung dc dn
## over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 1456 1.598740e-06 0.9999999 7 49
## 1460 2.692155e-06 0.9999999 7 53
## 3886 3.504527e-06 0.9999999 6 35
## 4432 5.367951e-06 0.9999997 7 59
## 3962 1.159691e-05 0.9999994 6 47
## 379 1.540305e-05 0.9999989 7 68
## 4965 2.016917e-05 0.9999981 8 104
## 2412 3.365894e-05 0.9999978 6 53
## 2348 4.030853e-05 0.9999973 6 52
## 3275 4.338874e-05 0.9999979 5 36
## FDR
## 1456 0.005964705
## 1460 0.005964705
## 3886 0.005964705
## 4432 0.006852189
## 3962 0.011842768
## 379 0.013107997
## 4965 0.014711966
## 2412 0.021182552
## 2348 0.021182552
## 3275 0.021182552
##
## [1] "set_35"
## [1] "CSRNP1" "GATA3" "KANSL2" "KDM6A" "NR4A3" "STK19" "TOB2"
## [8] "ZNF490" "ASCL2" "BHLHE40" "CNOT4" "GCN1" "KANSL1" "KMT2B"
## [15] "KMT2C" "PHF20" "PHF20L1" "RECK" "SH3BP5" "SYNE1" "TBX21"
## [22] "TNFSF10"
## $reactome
## category
## 352 formation of wdr5 containing histone modifying complexes
## 295 epigenetic regulation of gene expression
## 153 chromatin modifying enzymes
## over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 352 1.711024e-10 1.0000000 7 16
## 295 5.818645e-08 1.0000000 7 38
## 153 4.107397e-05 0.9999975 6 61
## FDR
## 352 2.054940e-07
## 295 3.494096e-05
## 153 1.644328e-02
##
## $go_bp
## category
## 1115 histone modification
## 1112 histone h3 k4 methylation
## 1114 histone methylation
## 2369 peptidyl lysine methylation
## 2689 positive regulation of histone methylation
## 3476 regulation of dosage compensation by inactivation of x chromosome
## 3580 regulation of histone h3 k4 methylation
## 3147 protein methylation
## 3581 regulation of histone methylation
## 1113 histone h4 acetylation
## over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 1115 7.962431e-07 1.0000000 6 29
## 1112 2.131837e-06 1.0000000 4 8
## 1114 3.822025e-06 1.0000000 4 10
## 2369 1.022126e-05 0.9999998 4 12
## 2689 1.771221e-05 0.9999999 3 5
## 3476 1.771221e-05 0.9999999 3 5
## 3580 1.771221e-05 0.9999999 3 5
## 3147 3.004498e-05 0.9999993 4 15
## 3581 3.708037e-05 0.9999997 3 7
## 1113 6.288136e-05 1.0000000 2 2
## FDR
## 1115 0.003808431
## 1112 0.005098289
## 1114 0.006093582
## 2369 0.012102503
## 2689 0.012102503
## 3476 0.012102503
## 3580 0.012102503
## 3147 0.017963144
## 3581 0.019706158
## 1113 0.030076153
##
## [1] "set_36"
## [1] "AC245297.3" "ADA2" "AL133415.1" "C1GALT1" "ERVK3-1"
## [6] "GIMAP6" "GZMK" "HEATR5B" "KLF10" "LEPROT"
## [11] "OTULINL" "SLC25A38" "TOX" "TRAV25" "TRBV6-5"
## [16] "BISPR" "CRIP2" "GAB3" "MIR4435-2HG" "MX2"
## [21] "MYBL1" "MYO1G" "SAMD9L" "SPON2" "SYNRG"
## $reactome
## category
## 239 diseases associated with o glycosylation of proteins
## 682 o linked glycosylation
## over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 239 6.447454e-05 0.9999999 2 3
## 682 1.245271e-04 0.9999997 2 4
## FDR
## 239 0.07477851
## 682 0.07477851
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 8958389 478.5 16391124 875.4 NA 16391124 875.4
## Vcells 19173826 146.3 60252964 459.7 65536 77218972 589.2
## R version 4.2.3 (2023-03-15)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.4.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] TxDb.Hsapiens.UCSC.hg38.knownGene_3.16.0
## [2] GenomicFeatures_1.50.4
## [3] GenomicRanges_1.50.2
## [4] GenomeInfoDb_1.34.9
## [5] org.Hs.eg.db_3.16.0
## [6] AnnotationDbi_1.60.2
## [7] IRanges_2.32.0
## [8] S4Vectors_0.36.2
## [9] Biobase_2.58.0
## [10] BiocGenerics_0.44.0
## [11] goseq_1.50.0
## [12] geneLenDataBase_1.34.0
## [13] BiasedUrn_2.0.10
## [14] fgsea_1.24.0
## [15] biomaRt_2.54.1
## [16] limma_3.54.2
## [17] tidyr_1.3.0
## [18] ggpubr_0.6.0
## [19] ggplot2_3.4.2
## [20] data.table_1.14.8
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-162 matrixStats_1.0.0
## [3] bitops_1.0-7 bit64_4.0.5
## [5] filelock_1.0.2 progress_1.2.2
## [7] httr_1.4.6 tools_4.2.3
## [9] backports_1.4.1 bslib_0.4.2
## [11] utf8_1.2.3 R6_2.5.1
## [13] mgcv_1.8-42 DBI_1.1.3
## [15] colorspace_2.1-0 withr_2.5.0
## [17] tidyselect_1.2.0 prettyunits_1.1.1
## [19] bit_4.0.5 curl_5.0.1
## [21] compiler_4.2.3 cli_3.6.1
## [23] xml2_1.3.4 DelayedArray_0.24.0
## [25] rtracklayer_1.58.0 sass_0.4.5
## [27] scales_1.2.1 rappdirs_0.3.3
## [29] Rsamtools_2.14.0 stringr_1.5.0
## [31] digest_0.6.31 rmarkdown_2.21
## [33] XVector_0.38.0 pkgconfig_2.0.3
## [35] htmltools_0.5.5 MatrixGenerics_1.10.0
## [37] dbplyr_2.3.2 fastmap_1.1.1
## [39] rlang_1.1.0 rstudioapi_0.14
## [41] RSQLite_2.3.1 BiocIO_1.8.0
## [43] jquerylib_0.1.4 generics_0.1.3
## [45] jsonlite_1.8.4 BiocParallel_1.32.6
## [47] dplyr_1.1.2 car_3.1-2
## [49] RCurl_1.98-1.12 magrittr_2.0.3
## [51] GO.db_3.16.0 GenomeInfoDbData_1.2.9
## [53] Matrix_1.6-4 Rcpp_1.0.10
## [55] munsell_0.5.0 fansi_1.0.4
## [57] abind_1.4-5 lifecycle_1.0.3
## [59] stringi_1.7.12 yaml_2.3.7
## [61] carData_3.0-5 SummarizedExperiment_1.28.0
## [63] zlibbioc_1.44.0 BiocFileCache_2.6.1
## [65] grid_4.2.3 blob_1.2.4
## [67] parallel_4.2.3 crayon_1.5.2
## [69] lattice_0.20-45 splines_4.2.3
## [71] Biostrings_2.66.0 cowplot_1.1.1
## [73] hms_1.1.3 KEGGREST_1.38.0
## [75] knitr_1.44 pillar_1.9.0
## [77] rjson_0.2.21 ggsignif_0.6.4
## [79] codetools_0.2-19 fastmatch_1.1-3
## [81] XML_3.99-0.14 glue_1.6.2
## [83] evaluate_0.20 png_0.1-8
## [85] vctrs_0.6.2 gtable_0.3.3
## [87] purrr_1.0.1 cachem_1.0.7
## [89] xfun_0.39 broom_1.0.4
## [91] restfulr_0.0.15 rstatix_0.7.2
## [93] tibble_3.2.1 GenomicAlignments_1.34.1
## [95] memoise_2.0.1